{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bd076d38",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:28.747521Z",
     "start_time": "2021-11-18T06:37:28.206499Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import h2o\n",
    "from h2o.automl import H2OAutoML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a2315d54",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:33.890896Z",
     "start_time": "2021-11-18T06:37:28.749498Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n",
      "Attempting to start a local H2O server...\n",
      "  Java Version: java version \"1.8.0_11\"; Java(TM) SE Runtime Environment (build 1.8.0_11-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.11-b03, mixed mode)\n",
      "  Starting server from /mnt/disk0/home/caihengxing/anaconda3/envs/python37/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar\n",
      "  Ice root: /tmp/tmp9od02eru\n",
      "  JVM stdout: /tmp/tmp9od02eru/h2o_caihengxing_started_from_python.out\n",
      "  JVM stderr: /tmp/tmp9od02eru/h2o_caihengxing_started_from_python.err\n",
      "  Server is running at http://127.0.0.1:54321\n",
      "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n",
      "Warning: Your H2O cluster version is too old (4 months and 9 days)! Please download and install the latest version from http://h2o.ai/download/\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
       "<td>01 secs</td></tr>\n",
       "<tr><td>H2O_cluster_timezone:</td>\n",
       "<td>Asia/Shanghai</td></tr>\n",
       "<tr><td>H2O_data_parsing_timezone:</td>\n",
       "<td>UTC</td></tr>\n",
       "<tr><td>H2O_cluster_version:</td>\n",
       "<td>3.32.1.4</td></tr>\n",
       "<tr><td>H2O_cluster_version_age:</td>\n",
       "<td>4 months and 9 days !!!</td></tr>\n",
       "<tr><td>H2O_cluster_name:</td>\n",
       "<td>H2O_from_python_caihengxing_nqdasd</td></tr>\n",
       "<tr><td>H2O_cluster_total_nodes:</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O_cluster_free_memory:</td>\n",
       "<td>26.67 Gb</td></tr>\n",
       "<tr><td>H2O_cluster_total_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_allowed_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_status:</td>\n",
       "<td>accepting new members, healthy</td></tr>\n",
       "<tr><td>H2O_connection_url:</td>\n",
       "<td>http://127.0.0.1:54321</td></tr>\n",
       "<tr><td>H2O_connection_proxy:</td>\n",
       "<td>{\"http\": null, \"https\": null}</td></tr>\n",
       "<tr><td>H2O_internal_security:</td>\n",
       "<td>False</td></tr>\n",
       "<tr><td>H2O_API_Extensions:</td>\n",
       "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
       "<tr><td>Python_version:</td>\n",
       "<td>3.7.10 final</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ------------------------------------------------------------------\n",
       "H2O_cluster_uptime:         01 secs\n",
       "H2O_cluster_timezone:       Asia/Shanghai\n",
       "H2O_data_parsing_timezone:  UTC\n",
       "H2O_cluster_version:        3.32.1.4\n",
       "H2O_cluster_version_age:    4 months and 9 days !!!\n",
       "H2O_cluster_name:           H2O_from_python_caihengxing_nqdasd\n",
       "H2O_cluster_total_nodes:    1\n",
       "H2O_cluster_free_memory:    26.67 Gb\n",
       "H2O_cluster_total_cores:    40\n",
       "H2O_cluster_allowed_cores:  40\n",
       "H2O_cluster_status:         accepting new members, healthy\n",
       "H2O_connection_url:         http://127.0.0.1:54321\n",
       "H2O_connection_proxy:       {\"http\": null, \"https\": null}\n",
       "H2O_internal_security:      False\n",
       "H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n",
       "Python_version:             3.7.10 final\n",
       "--------------------------  ------------------------------------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "38e00f10",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:35.830619Z",
     "start_time": "2021-11-18T06:37:35.826653Z"
    }
   },
   "outputs": [],
   "source": [
    "train_path = '../data/categorical_feature_encoding_challenge/train.csv'\n",
    "test_path = '../data/categorical_feature_encoding_challenge/test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "041e1995",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:42.169184Z",
     "start_time": "2021-11-18T06:37:36.883602Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "# Load data into H2O\n",
    "train = h2o.import_file(train_path)\n",
    "test  = h2o.import_file(test_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3dd5ee1e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:42.314632Z",
     "start_time": "2021-11-18T06:37:42.171937Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th style=\"text-align: right;\">  id</th><th style=\"text-align: right;\">  bin_0</th><th style=\"text-align: right;\">  bin_1</th><th style=\"text-align: right;\">  bin_2</th><th>bin_3  </th><th>bin_4  </th><th>nom_0  </th><th>nom_1    </th><th>nom_2  </th><th>nom_3     </th><th>nom_4   </th><th>nom_5    </th><th>nom_6    </th><th>nom_7    </th><th>nom_8    </th><th>nom_9    </th><th style=\"text-align: right;\">  ord_0</th><th>ord_1      </th><th>ord_2      </th><th>ord_3  </th><th>ord_4  </th><th>ord_5  </th><th style=\"text-align: right;\">  day</th><th style=\"text-align: right;\">  month</th><th style=\"text-align: right;\">  target</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td style=\"text-align: right;\">   0</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td>T      </td><td>Y      </td><td>Green  </td><td>Triangle </td><td>Snake  </td><td>Finland   </td><td>Bassoon </td><td>50f116bcf</td><td>3ac1b8814</td><td>68f6ad3e9</td><td>c389000ab</td><td>2f4cb3d51</td><td style=\"text-align: right;\">      2</td><td>Grandmaster</td><td>Cold       </td><td>h      </td><td>D      </td><td>kr     </td><td style=\"text-align: right;\">    2</td><td style=\"text-align: right;\">      2</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   1</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      0</td><td>T      </td><td>Y      </td><td>Green  </td><td>Trapezoid</td><td>Hamster</td><td>Russia    </td><td>Piano   </td><td>b3b4d25d0</td><td>fbcb50fc1</td><td>3b6dd5612</td><td>4cd920251</td><td>f83c56c21</td><td style=\"text-align: right;\">      1</td><td>Grandmaster</td><td>Hot        </td><td>a      </td><td>A      </td><td>bF     </td><td style=\"text-align: right;\">    7</td><td style=\"text-align: right;\">      8</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   2</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td>F      </td><td>Y      </td><td>Blue   </td><td>Trapezoid</td><td>Lion   </td><td>Russia    </td><td>Theremin</td><td>3263bdce5</td><td>0922e3cb8</td><td>a6a36f527</td><td>de9c9f684</td><td>ae6800dd0</td><td style=\"text-align: right;\">      1</td><td>Expert     </td><td>Lava Hot   </td><td>h      </td><td>R      </td><td>Jc     </td><td style=\"text-align: right;\">    7</td><td style=\"text-align: right;\">      2</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   3</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      0</td><td>F      </td><td>Y      </td><td>Red    </td><td>Trapezoid</td><td>Snake  </td><td>Canada    </td><td>Oboe    </td><td>f12246592</td><td>50d7ad46a</td><td>ec69236eb</td><td>4ade6ab69</td><td>8270f0d71</td><td style=\"text-align: right;\">      1</td><td>Grandmaster</td><td>Boiling Hot</td><td>i      </td><td>D      </td><td>kW     </td><td style=\"text-align: right;\">    2</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">       1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   4</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td>F      </td><td>N      </td><td>Red    </td><td>Trapezoid</td><td>Lion   </td><td>Canada    </td><td>Oboe    </td><td>5b0f5acd5</td><td>1fe17a1fd</td><td>04ddac2be</td><td>cb43ab175</td><td>b164b72a7</td><td style=\"text-align: right;\">      1</td><td>Grandmaster</td><td>Freezing   </td><td>a      </td><td>R      </td><td>qP     </td><td style=\"text-align: right;\">    7</td><td style=\"text-align: right;\">      8</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   5</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      1</td><td>T      </td><td>N      </td><td>Blue   </td><td>Polygon  </td><td>Lion   </td><td>Costa Rica</td><td>Oboe    </td><td>46cab09da</td><td>29a854620</td><td>ff5b35098</td><td>b7e6f8e6f</td><td>51e27c16d</td><td style=\"text-align: right;\">      1</td><td>Novice     </td><td>Freezing   </td><td>j      </td><td>E      </td><td>PZ     </td><td style=\"text-align: right;\">    2</td><td style=\"text-align: right;\">      2</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   6</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      1</td><td>T      </td><td>N      </td><td>Green  </td><td>Trapezoid</td><td>Cat    </td><td>China     </td><td>Piano   </td><td>be5592604</td><td>3393a0f78</td><td>c6587685d</td><td>06f5ae149</td><td>7e3d79a0d</td><td style=\"text-align: right;\">      2</td><td>Grandmaster</td><td>Lava Hot   </td><td>g      </td><td>P      </td><td>wy     </td><td style=\"text-align: right;\">    5</td><td style=\"text-align: right;\">      4</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   7</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td>T      </td><td>Y      </td><td>Red    </td><td>Triangle </td><td>Dog    </td><td>Russia    </td><td>Oboe    </td><td>72f8028dc</td><td>55eed5058</td><td>2dd9daf45</td><td>98addc2c9</td><td>feb72ecc2</td><td style=\"text-align: right;\">      1</td><td>Novice     </td><td>Lava Hot   </td><td>j      </td><td>K      </td><td>Ed     </td><td style=\"text-align: right;\">    4</td><td style=\"text-align: right;\">      2</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   8</td><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      1</td><td>T      </td><td>Y      </td><td>Blue   </td><td>Square   </td><td>Hamster</td><td>Canada    </td><td>Bassoon </td><td>4604905e7</td><td>3e44d44eb</td><td>3f0057c9b</td><td>a2d110837</td><td>34a7273bf</td><td style=\"text-align: right;\">      2</td><td>Novice     </td><td>Boiling Hot</td><td>e      </td><td>V      </td><td>qo     </td><td style=\"text-align: right;\">    3</td><td style=\"text-align: right;\">      4</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   9</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">      0</td><td>F      </td><td>Y      </td><td>Red    </td><td>Trapezoid</td><td>Lion   </td><td>China     </td><td>Piano   </td><td>ad95dc0ee</td><td>8ed6221ae</td><td>4fbfe4a84</td><td>2c15d0173</td><td>0ece7a511</td><td style=\"text-align: right;\">      1</td><td>Expert     </td><td>Freezing   </td><td>h      </td><td>Q      </td><td>CZ     </td><td style=\"text-align: right;\">    3</td><td style=\"text-align: right;\">      2</td><td style=\"text-align: right;\">       0</td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "47d55d05",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T06:37:57.856885Z",
     "start_time": "2021-11-18T06:37:57.852675Z"
    }
   },
   "outputs": [],
   "source": [
    "y = \"target\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f5bc32f0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:30:41.881501Z",
     "start_time": "2021-11-18T06:38:02.736390Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AutoML progress: |████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "aml2 = H2OAutoML(max_runtime_secs = 7200, seed = 1, project_name = \"kaggle_categorical_feature_encoding_challenge\")\n",
    "aml2.train(y = y, training_frame = train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dc204b55",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:32:09.201060Z",
     "start_time": "2021-11-18T08:30:41.884540Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "stackedensemble prediction progress: |████████████████████████████████████| 100%\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/caihengxing/anaconda3/envs/python37/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'nom_8' has levels not trained on: [\"1f0a80e1d\", \"2be51c868\", \"a9bf3dc47\", \"ec337ce4c\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/envs/python37/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'nom_9' has levels not trained on: [\"006d59dba\", \"06b629fcd\", \"0c4a1cf34\", \"0cc35e186\", \"11ac3a364\", \"16c23cafe\", \"17638b95e\", \"248d87c9d\", \"2497c8bcb\", \"25b4ca9bb\", ...67 not listed..., \"e45838711\", \"e7a93ce71\", \"e8d79201b\", \"e9d7cba73\", \"ea57886d6\", \"ec5b80639\", \"f2afe9840\", \"fa260216c\", \"fa6771844\", \"fc198175f\"]\n",
      "  warnings.warn(w)\n"
     ]
    }
   ],
   "source": [
    "pred = aml2.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b7416fc2",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:32:10.122777Z",
     "start_time": "2021-11-18T08:32:09.203912Z"
    }
   },
   "outputs": [],
   "source": [
    "sub = pd.read_csv(test_path)\n",
    "sub = sub[['id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7c77bf3a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:32:11.268183Z",
     "start_time": "2021-11-18T08:32:10.124529Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "pred = h2o.as_list(pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "12c104df",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:32:11.274561Z",
     "start_time": "2021-11-18T08:32:11.269908Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sub[y] = pred['predict']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "50f9c69c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-18T08:32:12.090309Z",
     "start_time": "2021-11-18T08:32:11.275925Z"
    }
   },
   "outputs": [],
   "source": [
    "sub.to_csv(\"./h2o_sub_kaggle_categorical_feature_encoding_challenge.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "523d4a1a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
